This is the exploration of the Census data from 2001-2016. Click on the ‘Code’ button on the right to see the codes.
First we download the packages.
library(readxl)
library(dplyr)
library(ggplot2)
library(tidyr)
library(base)
library(DT)
library(knitr)
library(plotly)
library(rsconnect)
library(rmarkdown)
Next Load the Data.
load("~/Dropbox/MRP/2001.Rdata")
load("~/Dropbox/MRP/2006.Rdata")
load("~/Dropbox/MRP/2011.Rdata")
load("~/Dropbox/MRP/2016.Rdata")
This study aims to explore the difference in income between immigrants and citizens in various occupations in Nova Scotia from 2001 to 2016, and how this difference has changed over time. Using census microdata from 2001-2016, this research will analyze the income of immigrants in 20-30 different occupations in Nova Scotia, and compare it to the income of Canadian citizens in the same occupations. This will allow for an examination of how the income penalty has changed over time, and how it may be influenced by economic and contextual factors. The study will also examine trend data on the differences in income between immigrant and citizen status over the past 16 years, in order to understand how the income gap has changed over time. This research will provide valuable insights into the income penalty faced by immigrants in specifically in Nova Scotia as well as all of Canada, and identify which occupations are most affected by immigration status. It will also allow for an examination of the factors that may influence the income gap between immigrants and citizens, such as human capital, age, region of origin, and length of time spent in Canada. By understanding the income penalty faced by immigrants, and how it has changed over time, this study has the potential to inform policy and practice aimed at reducing these disparities and supporting the successful integration of immigrants into the labour market.
Data for this study was derived from the Census Public Microdata Files (PUMF) for the years 2001, 2006, 2011 and 2016. This data is cross-sectional, incorporating data from all provinces in Canada, ages, immigrant status, labour status, occupation categories and wages and salaries. The Census PUMF files enabled accessible data that differentiated income, occupation and immigration status for people in Canada in different provinces.
Examine the following data for the 4 census years by looking at the structure of the data. Here you can also see the unique values for each variable except for wages as they are a continuous variable.
str(df01)
## tibble [801,055 × 7] (S3: tbl_df/tbl/data.frame)
## $ prov : num [1:801055] 10 10 10 10 10 10 10 10 10 10 ...
## $ age : num [1:801055] 14 9 3 8 14 5 11 11 2 0 ...
## $ immpop : num [1:801055] 1 1 1 1 1 1 1 1 1 1 ...
## $ hours : num [1:801055] 9 9 9 9 9 9 9 9 9 9 ...
## $ occupation: num [1:801055] 99 99 99 99 99 99 99 99 99 99 ...
## $ industry : num [1:801055] 99 99 99 99 99 99 99 99 99 99 ...
## $ wage : num [1:801055] 1e+07 1e+07 1e+07 1e+07 1e+07 ...
for (col in names(df01)) {
if (col == "wage") {
next
}
print(paste("Unique values in column", col, ":", paste(unique(df01[, col]), collapse = ", ")))
}
## [1] "Unique values in column prov : c(10, 11, 12, 13, 24, 35, 46, 47, 48, 59, 60)"
## [1] "Unique values in column age : c(14, 9, 3, 8, 5, 11, 2, 0, 12, 4, 13, 10, 6, 1, 7, 42, 47, 55, 40, 51, 30, 54, 53, 44, 50, 45, 43, 32, 60, 56, 34, 46, 36, 52, 35, 49, 41, 37, 62, 57, 38, 39, 58, 31, 64, 27, 48, 33, 29, 61, 59, 63, 67, 65, 28, 25, 66, 26, 72, 84, 71, 70, 24, 73, 68, 74, 23, 78, 69, 75, 77, 80, 76, 85, 79, 21, 20, 22, 18, 15, 19, 17, 82, 81, 83, 16)"
## [1] "Unique values in column immpop : c(1, 3, 2, 8)"
## [1] "Unique values in column hours : c(9, 1, 2)"
## [1] "Unique values in column occupation : c(99, 13, 1, 7, 2, 3, 23, 6, 11, 12, 9, 20, 18, 21, 4, 19, 22, 8, 17, 5, 24, 10, 15, 25, 14, 16)"
## [1] "Unique values in column industry : c(99, 18, 6, 20, 16, 10, 12, 5, 1, 8, 17, 15, 14, 7, 9, 11, 3, 4, 19, 98, 13, 2)"
str(df06)
## tibble [844,476 × 7] (S3: tbl_df/tbl/data.frame)
## $ prov : num [1:844476] 24 59 47 24 24 24 35 35 59 24 ...
## $ age : num [1:844476] 13 1 2 15 9 1 13 21 5 5 ...
## $ immpop : num [1:844476] 3 2 2 2 2 2 2 2 3 2 ...
## $ hours : num [1:844476] 1 9 9 9 9 9 9 9 9 9 ...
## $ occupation: num [1:844476] 8 99 99 99 99 99 99 99 99 99 ...
## $ industry : num [1:844476] 16 99 99 99 99 99 99 99 99 99 ...
## $ wage : num [1:844476] 9e+03 1e+07 1e+07 0e+00 1e+00 ...
for (col in names(df06)) {
if (col == "wage") {
next
}
print(paste("Unique values in column", col, ":", paste(unique(df06[, col]), collapse = ", ")))
}
## [1] "Unique values in column prov : c(24, 59, 47, 35, 11, 13, 12, 10, 48, 46, 60)"
## [1] "Unique values in column age : c(13, 1, 2, 15, 9, 21, 5, 6, 11, 16, 20, 3, 14, 12, 7, 17, 4, 10, 18, 8, 19, 88)"
## [1] "Unique values in column immpop : c(3, 2, 1)"
## [1] "Unique values in column hours : c(1, 9, 2)"
## [1] "Unique values in column occupation : c(8, 99, 11, 5, 22, 88, 10, 15, 2, 17, 21, 4, 6, 13, 16, 7, 9, 23, 20, 24, 1, 14, 25, 19, 3, 18, 12)"
## [1] "Unique values in column industry : c(16, 99, 19, 14, 4, 88, 15, 20, 17, 10, 18, 12, 7, 1, 5, 6, 8, 9, 2, 11, 3, 13)"
str(df11)
## tibble [887,012 × 7] (S3: tbl_df/tbl/data.frame)
## $ prov : num [1:887012] 24 35 48 35 35 24 59 13 35 59 ...
## $ age : num [1:887012] 15 11 9 13 13 13 10 14 12 9 ...
## $ immpop : num [1:887012] 2 1 3 1 1 1 1 1 3 3 ...
## $ hours : num [1:887012] 1 1 9 1 9 1 1 1 2 1 ...
## $ occupation: num [1:887012] 11 7 19 6 99 24 88 6 21 23 ...
## $ industry : num [1:887012] 62 62 72 62 99 22 54 23 56 72 ...
## $ wage : num [1:887012] 0 68000 18000 78000 0 61000 9000 26000 9000 27000 ...
for (col in names(df11)) {
if (col == "wage") {
next
}
print(paste("Unique values in column", col, ":", paste(unique(df11[, col]), collapse = ", ")))
}
## [1] "Unique values in column prov : c(24, 35, 48, 59, 13, 47, 12, 60, 10, 46, 11)"
## [1] "Unique values in column age : c(15, 11, 9, 13, 10, 14, 12, 3, 1, 16, 19, 8, 6, 5, 20, 7, 18, 17, 21, 4, 2, 88)"
## [1] "Unique values in column immpop : c(2, 1, 3)"
## [1] "Unique values in column hours : c(1, 9, 2)"
## [1] "Unique values in column occupation : c(11, 7, 19, 6, 99, 24, 88, 21, 23, 27, 13, 12, 4, 9, 20, 22, 2, 14, 18, 30, 15, 25, 8, 10, 1, 16, 26, 28, 3, 5, 29, 17)"
## [1] "Unique values in column industry : c(62, 72, 99, 22, 54, 23, 56, 44, 61, 41, 91, 48, 31, 52, 51, 21, 53, 11, 81, 71, 88)"
str(df16)
## tibble [220,969 × 7] (S3: tbl_df/tbl/data.frame)
## $ prov : num [1:220969] 35 35 35 35 11 35 35 48 24 35 ...
## $ age : num [1:220969] 11 5 2 12 15 19 18 18 14 8 ...
## $ immpop : num [1:220969] 1 1 1 1 1 1 1 1 1 1 ...
## $ hours : num [1:220969] 1 9 9 1 1 9 9 9 1 1 ...
## $ occupation: num [1:220969] 24 99 99 8 6 99 99 99 19 10 ...
## $ Industry : num [1:220969] 12 99 99 9 14 99 99 99 18 12 ...
## $ wage : num [1:220969] 9.5e+04 1.0e+08 1.0e+08 1.9e+04 2.9e+04 ...
for (col in names(df16)) {
if (col == "wage") {
next
}
print(paste("Unique values in column", col, ":", paste(unique(df16[, col]), collapse = ", ")))
}
## [1] "Unique values in column prov : c(35, 11, 48, 24, 10, 59, 46, 47, 12, 13, 70)"
## [1] "Unique values in column age : c(11, 5, 2, 12, 15, 19, 18, 14, 8, 1, 16, 4, 6, 9, 88, 20, 10, 3, 13, 7, 17, 21)"
## [1] "Unique values in column immpop : c(1, 2, 3, 8)"
## [1] "Unique values in column hours : c(1, 9, 2, 8)"
## [1] "Unique values in column occupation : c(24, 99, 8, 6, 19, 10, 26, 13, 15, 20, 88, 9, 18, 7, 22, 25, 27, 28, 2, 4, 11, 21, 30, 5, 17, 12, 16, 29, 23, 14, 3, 1)"
## [1] "Unique values in column Industry : c(12, 99, 9, 14, 18, 19, 10, 15, 7, 17, 4, 6, 88, 13, 1, 5, 8, 11, 16, 3, 2)"
– prov (province)- is categorized as numbers, differing in the different census years.
– Ages– are categorized differently throughout the different years, either numeric or as age divisions.
– immpop (immigrant status)– categorized as characters 1-3 representing immgirant staus, citizens and non permanent residents. Character 8 represents statuses that are not available. These characters represent different statuses in different years.
– Hours (full time or part time)– categorized as 1, 2 and 9. 1 and 2 represent full time or part time and 9 represent values not applicable. Full time = more than 30 hours per week.
– Occupation – categorized from 1-25 (2001 & 2006) and 1-30 (2011 & 2016).
The control variables for this study limited the data to individuals above the age of 15, working in Nova Scotia in part 1 and nationwide in part 2, categorized as full time or part time workers. This study also focuses on immigrants and citizens.
The census years for 2001 and 2006, categorized the occupations from 1 to 25, and he census years for 2011 and 2016, categorized the occupations from 1 to 25.
Due to updates in Census forms, a section of the analysis will use categorization of 1-20 occupations that is cohesive with all 4 years of Census data. This format will be used on comparison of all of the data, to enhance visualization and further analysis. Occupations in census years 2001 and 2006 are categorized identically as are census years 2011 and 2016.
o1 <- c("Senior management positions",
"Other management",
"Professionals in business and finance",
"Financial, secretarial,administrative",
"Clerical occs/supervisors",
"Natural and applied sciences",
"Prof health,registered nurses,supervisors",
"Technical,assisting,related health occs",
"Social science,government,religion",
"Teachers and professors",
"Art,culture,recreation,sport",
"Wholesale,technical,insurance,real estate",
"Retail trade",
"Chefs and cooks, and other related occupations",
"Protective services",
"Childcare and home support workers",
"Travel,accommodation,recreation, sport",
"Trades and transportation",
"Construction trades",
"Other trades occupations",
"Transport and equipment operators",
"Trades helpers,labourers etc",
"Occ unique to primary industries",
"Manufacturing supervisors, operators, etc",
"Manufacturing, utilities, etc labourers")
Occ25<-c(1:25)
Occupations_2001_2006<- data.frame(Occ25,o1)
datatable(Occupations_2001_2006) %>% formatStyle(c('Occ25','o1'), backgroundColor = styleInterval(1, c('white', 'lightblue')))
o2 <- c(
"Senior management occupations",
"Specialised middle management occupations",
"Middle management occupations in retail and wholesale trade and customer services",
"Middle management occupations in trades, transportation, production and utilities",
"Professional occupations in business and finance",
"Administrative and financial supervisors and administrative occupations",
"Finance, insurance, distribution, tracking, scheduling and related business administrative occupations",
"Office support occupations",
"Professional occupations in natural and applied sciences",
"Technical occupations related to natural and applied sciences",
"Professional occupations in health (including nursing)",
"Technical and assisting occupations in health or in support of health services",
"Professional occupations in education services",
"Professional occupations in law and social, community and government services",
"Paraprofessional occupations in legal, social, community and education services",
"Public protection, care providers, educational, legal and protection support occupations",
"Professional and technical occupations in art, culture, recreation and sport",
"Retail sales supervisors and specialized sales occupations",
"Service supervisors and specialized service occupations",
"Sales representatives and salespersons - wholesale and retail trade",
"Service representatives and other customer and personal services occupations",
"Sales support occupations",
"Service support and other service occupations, n.e.c.",
"Industrial, electrical and construction trades",
"Maintenance and equipment operation trades",
"Trade helpers, construction laborers, installers, repairers and related occupations",
"Transport and heavy equipment operation and related maintenance occupations",
"Supervisors, technical occupations and workers in natural resources, agriculture and related production",
"Supervisors and operators in processing, manufacturing and utilities",
"Assemblers and laborers in processing, manufacturing and utilities")
Occ30<-c(1:30)
Occupations_2011_2016<- data.frame(Occ30,o2)
datatable(Occupations_2011_2016) %>% formatStyle(c('Occ30','o2'), backgroundColor = styleInterval(1, c('white', 'lightblue')))
# Create a data frame with two columns for the occupation and the corresponding 2006 and 2016 codes
o3 <- c("Senior management",
"Other management",
"Professional business and finance",
"Administrative and financial supervisors and administrative occupations",
"Natural and applied sciences; technical and professional occupations",
"Professional occupations in health (including nursing)",
"Technical and assisting occupations in health or in support of health services",
"Professional occupations in law and social, community and government services",
"Teachers and professors",
"Art, culture, recreation, sport",
"Wholesale, technical, insurance, real estate",
"Retail trade",
"Chefs, cooks and occupations in travel, accommodation, recreation, sport",
"Protective services and childcare and home support workers",
"Trades and transportation, construction trades and other trades occupations",
"Transport and equipment operators",
"Trades helpers, laborers etc",
"Occ unique to primary industries",
"Workers in processing, manufacturing and maintenance",
"Workers in transport and heavy equipment operation positions")
adj01_06 <- c("1", "2", "3", "4,5", "6", "7", "8", "9", "10", "11", "12", "13", "14,17", "15,16", "18,19,20", "21", "22", "23", "24", "25")
adj11_16<- c("1", "2,3,4", "5", "6,7,8", "9,10", "11", "12", "14,15", "13", "17", "18,20", "22", "19,21,23", "16", "24,25", "27", "26", "28", "29", "30")
Occnu<-c(1:20)
Combined_01_16<- data.frame(Occnu,o3, adj01_06, adj11_16)
datatable(Combined_01_16) %>% formatStyle(c('Occnu','o3','adj01_06','adj11_16'), backgroundColor = styleInterval(1, c('white', 'lightblue')))
The first step filters the data to account for our control variables and remove miscellaneous and unidentifiable data. Control variables are ages above 15 and full time or part time labor status, in Nova Scotia and in all of Canada.
TEST01<- filter(df01, age>14, age<87, hours<4, wage<8888887, prov==12, occupation<26, immpop<4, wage>2)
test01 <- TEST01[ , -c(1,2,4,6)]
TEST06<- filter(df06, age>5, age<87, hours<4, wage<8888887, prov==12, occupation<26, immpop<4, wage>2)
test06 <- TEST06[ , -c(1,2,4,6)]
TEST11<- filter(df11, age>5, age<87, hours<4, wage<8888887, prov==12, occupation<31, immpop<4, wage>2)
test11 <- TEST11[ , -c(1,2,4,6)]
TEST16<- filter(df16, age>6, age<87, hours<4, wage<88888887, prov==12, occupation<31, immpop<4, wage>2)
test16 <- TEST16[ , -c(1,2,4,6)]
TTEST01<- filter(df01, age>14, age<87, hours<4, wage<8888887, occupation<26, immpop<4, wage>2)
ttest01 <- TTEST01[ , -c(1,2,4,6)]
TTEST06<- filter(df06, age>5, age<87, hours<4, wage<8888887, occupation<26, immpop<4, wage>2)
ttest06 <- TTEST06[ , -c(1,2,4,6)]
TTEST11<- filter(df11, age>5, age<87, hours<4, wage<8888887, occupation<31, immpop<4, wage>2)
ttest11 <- TTEST11[ , -c(1,2,4,6)]
TTEST16<- filter(df16, age>6, age<87, hours<4, wage<88888887, occupation<31, immpop<4, wage>2)
ttest16 <- TTEST16[ , -c(1,2,4,6)]
d01I<-filter(test01, immpop==2)
d01I<-as.data.frame(d01I)
d01C<-filter(test01, immpop == 1)
d01C<-as.data.frame(d01C)
d06I<-filter(test06, immpop == 3)
d06I<-as.data.frame(d06I)
d06C<-filter(test06, immpop == 2)
d06C<-as.data.frame(d06C)
d11I<-filter(test11, immpop == 2)
d11I<-as.data.frame(d11I)
d11C<-filter(test11, immpop == 1)
d11C<-as.data.frame(d11C)
d16I<-filter(test16, immpop == 2)
d16I<-as.data.frame(d16I)
d16C<-filter(test16, immpop == 1)
d16C<-as.data.frame(d16C)
colnames(d01C)<-c('immpop','occupation','wage')
d01C$'occupation'<-as.character(d01C$'occupation')
colnames(d06C)<-c('immpop','occupation','wage')
d06C$'occupation'<-as.character(d06C$'occupation')
colnames(d11C)<-c('immpop','occupation','wage')
d11C$'occupation'<-as.character(d11C$'occupation')
colnames(d16C)<-c('immpop','occupation','wage')
d16C$'occupation'<-as.character(d16C$'occupation')
colnames(d01I)<-c('immpop','occupation','wage')
d01I$'occupation'<-as.character(d01I$'occupation')
colnames(d16I)<-c('immpop','occupation','wage')
d16I$'occupation'<-as.character(d16I$'occupation')
colnames(d06I)<-c('immpop','occupation','wage')
d06I$'occupation'<-as.character(d06I$'occupation')
colnames(d11I)<-c('immpop','occupation','wage')
d11I$'occupation'<-as.character(d11I$'occupation')
dd01I<-filter(ttest01, immpop==2)
dd01I<-as.data.frame(dd01I)
dd01C<-filter(ttest01, immpop == 1)
dd01C<-as.data.frame(dd01C)
dd06I<-filter(ttest06, immpop == 3)
dd06I<-as.data.frame(dd06I)
dd06C<-filter(ttest06, immpop == 2)
dd06C<-as.data.frame(dd06C)
dd11I<-filter(ttest11, immpop == 2)
dd11I<-as.data.frame(dd11I)
dd11C<-filter(ttest11, immpop == 1)
dd11C<-as.data.frame(dd11C)
dd16I<-filter(ttest16, immpop == 2)
dd16I<-as.data.frame(dd16I)
dd16C<-filter(ttest16, immpop == 1)
dd16C<-as.data.frame(dd16C)
colnames(dd01C)<-c('immpop','occupation','wage')
dd01C$'occupation'<-as.character(dd01C$'occupation')
colnames(dd06C)<-c('immpop','occupation','wage')
dd06C$'occupation'<-as.character(dd06C$'occupation')
colnames(dd11C)<-c('immpop','occupation','wage')
dd11C$'occupation'<-as.character(dd11C$'occupation')
colnames(dd16C)<-c('immpop','occupation','wage')
dd16C$'occupation'<-as.character(dd16C$'occupation')
colnames(dd01I)<-c('immpop','occupation','wage')
dd01I$'occupation'<-as.character(dd01I$'occupation')
colnames(dd16I)<-c('immpop','occupation','wage')
dd16I$'occupation'<-as.character(dd16I$'occupation')
colnames(dd06I)<-c('immpop','occupation','wage')
dd06I$'occupation'<-as.character(dd06I$'occupation')
colnames(dd11I)<-c('immpop','occupation','wage')
dd11I$'occupation'<-as.character(dd11I$'occupation')
The following are boxplot graphs to visualize the distribution of the different occupations in each year. This allows us to see the concentration of wages for the different occupations, and visualize the changes in the distribution over the 16 years.
Hover over the boxplots to see distribution information.
ggplot(d01C) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(x = "Occupation",
y = "Wages",
title = "Distribution of Citizen Income in 2001") +
theme_minimal()
ggplot(d01I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2001"
) +
theme_minimal()
ggplot(d06I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2006"
) +
theme_minimal()
ggplot(d11C) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wages",
title = "Distribution of Citizen Income in 2011"
) +
theme_minimal()
ggplot(d11I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2011"
) +
theme_minimal()
ggplot(d16C) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wages",
title = "Distribution of Citizen Income in 2016"
) +
theme_minimal()
ggplot(d16I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2016"
) +
theme_minimal()
ggplot(dd01C) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wages",
title = "Distribution of Citizen Income in 2001"
) +
theme_minimal()
ggplot(dd01I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2001"
) +
theme_minimal()
ggplot(dd06I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2006"
) +
theme_minimal()
ggplot(dd11C) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wages",
title = "Distribution of Citizen Income in 2011"
) +
theme_minimal()
ggplot(dd11I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2011"
) +
theme_minimal()
ggplot(dd16C) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wages",
title = "Distribution of Citizen Income in 2016"
) +
theme_minimal()
ggplot(dd16I) +
aes(x = occupation, y = wage) +
geom_boxplot(fill = "#228B22") +
labs(
x = "Occupation",
y = "Wage",
title = "Distribution of Income for Immigrants in 2016"
) +
theme_minimal()
The average income of each occupation of each immigration is calculated next. This allows us to compare the income of immigrants and citizens more accurately.
# matrices for mean wages
N01mean <- matrix(0,0,0)
C01mean <- matrix(0,0,0)
I01mean <- matrix(0,0,0)
N01 <- matrix(0,0,0)
C01 <- matrix(0,0,0)
I01 <- matrix(0,0,0)
for(i in 1:25){
occ01<- matrix(0,0,0)
occ01<- filter(test01, occupation ==i)
for (j in 1:3) {
N01<- filter(occ01, immpop ==3)
C01<- filter(occ01, immpop ==1)
I01 <- filter(occ01, immpop==2)
N01mean[i]<- mean(N01$wage)
C01mean[i]<- mean(C01$wage)
I01mean[i]<-mean(I01$wage)
N01mean[is.na(N01mean)] <- 0
C01mean[is.na(N01mean)] <- 0
I01mean[is.na(N01mean)] <- 0
Mean2001<-cbind(N01mean, C01mean, I01mean)
Mean2001<-as.data.frame(Mean2001)
colnames(Mean2001)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2001[is.na(Mean2001)] <- 0
}
}
N06mean <- matrix(0,0,0)
C06mean <- matrix(0,0,0)
I06mean <- matrix(0,0,0)
N06 <- matrix(0,0,0)
C06 <- matrix(0,0,0)
I06 <- matrix(0,0,0)
for(i in 1:25){
occ06<- matrix(0,0,0)
occ06<- filter(test06, occupation ==i)
for (j in 1:3) {
N06<- filter(occ06, immpop ==1)
C06<- filter(occ06, immpop ==2)
I06 <- filter(occ06, immpop==3)
N06mean[i]<- mean(N06$wage)
C06mean[i]<- mean(C06$wage)
I06mean[i]<-mean(I06$wage)
N06mean[is.na(N06mean)] <- 0
C06mean[is.na(N06mean)] <- 0
I06mean[is.na(N06mean)] <- 0
I06mean[is.na(I06mean)] <- 0
C06mean[is.na(C06mean)] <- 0
Mean2006<-cbind(N06mean, C06mean, I06mean)
Mean2006<-as.data.frame(Mean2006)
colnames(Mean2006)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2006[is.na(Mean2006)] <- 0
}
}
N11mean <- matrix(0,0,0)
C11mean <- matrix(0,0,0)
I11mean <- matrix(0,0,0)
N11 <- matrix(0,0,0)
C11 <- matrix(0,0,0)
I11 <- matrix(0,0,0)
for(i in 1:30){
occ11<- matrix(0,0,0)
occ11<- filter(test11, occupation ==i)
for (j in 1:3) {
N11<- filter(occ11, immpop ==3)
C11<- filter(occ11, immpop ==1)
I11 <- filter(occ11, immpop==2)
N11mean[i]<- mean(N11$wage)
C11mean[i]<- mean(C11$wage)
I11mean[i]<-mean(I11$wage)
N11mean[is.na(N11mean)] <- 0
C11mean[is.na(N11mean)] <- 0
I11mean[is.na(N11mean)] <- 0
Mean2011<-cbind(N11mean, C11mean, I11mean)
Mean2011<-as.data.frame(Mean2011)
colnames(Mean2011)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2011[is.na(Mean2011)] <- 0
}
}
N16mean <- matrix(0,0,0)
C16mean <- matrix(0,0,0)
I16mean <- matrix(0,0,0)
N16 <- matrix(0,0,0)
C16 <- matrix(0,0,0)
I16 <- matrix(0,0,0)
for(i in 1:30){
occ16<- matrix(0,0,0)
occ16<- filter(test16, occupation ==i)
for (j in 1:3) {
N16<- filter(occ16, immpop ==3)
C16<- filter(occ16, immpop ==1)
I16 <- filter(occ16, immpop==2)
N16mean[i]<- mean(N16$wage)
C16mean[i]<- mean(C16$wage)
I16mean[i]<-mean(I16$wage)
N16mean[is.na(N16mean)] <- 0
C16mean[is.na(N16mean)] <- 0
I16mean[is.na(N16mean)] <- 0
Mean2016<-cbind(N16mean, C16mean, I16mean)
Mean2016<-as.data.frame(Mean2016)
colnames(Mean2016)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2016[is.na(Mean2016)] <- 0
}
}
# matrices for mean wages
N01qmean <- matrix(0,0,0)
C01qmean <- matrix(0,0,0)
I01qmean <- matrix(0,0,0)
N01q <- matrix(0,0,0)
C01q <- matrix(0,0,0)
I01q <- matrix(0,0,0)
for(i in 1:25){
occ01q<- matrix(0,0,0)
occ01q<- filter(ttest01, occupation ==i)
for (j in 1:3) {
N01q<- filter(occ01q, immpop ==3)
C01q<- filter(occ01q, immpop ==1)
I01q <- filter(occ01q, immpop==2)
N01qmean[i]<- mean(N01q$wage)
C01qmean[i]<- mean(C01q$wage)
I01qmean[i]<-mean(I01q$wage)
N01qmean[is.na(N01qmean)] <- 0
C01qmean[is.na(N01qmean)] <- 0
I01qmean[is.na(N01qmean)] <- 0
Mean2001q<-cbind(N01qmean, C01qmean, I01qmean)
Mean2001q<-as.data.frame(Mean2001q)
colnames(Mean2001q)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2001q[is.na(Mean2001q)] <- 0
}
}
N06qmean <- matrix(0,0,0)
C06qmean <- matrix(0,0,0)
I06qmean <- matrix(0,0,0)
N06q <- matrix(0,0,0)
C06q <- matrix(0,0,0)
I06q <- matrix(0,0,0)
for(i in 1:25){
occ06q<- matrix(0,0,0)
occ06q<- filter(ttest06, occupation ==i)
for (j in 1:3) {
N06q<- filter(occ06q, immpop ==1)
C06q<- filter(occ06q, immpop ==2)
I06q <- filter(occ06q, immpop==3)
N06qmean[i]<- mean(N06q$wage)
C06qmean[i]<- mean(C06q$wage)
I06qmean[i]<-mean(I06q$wage)
N06qmean[is.na(N06qmean)] <- 0
C06qmean[is.na(N06qmean)] <- 0
I06qmean[is.na(N06qmean)] <- 0
I06qmean[is.na(I06qmean)] <- 0
C06qmean[is.na(C06qmean)] <- 0
Mean2006q<-cbind(N06qmean, C06qmean, I06qmean)
Mean2006q<-as.data.frame(Mean2006q)
colnames(Mean2006q)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2006q[is.na(Mean2006q)] <- 0
}
}
N11qmean <- matrix(0,0,0)
C11qmean <- matrix(0,0,0)
I11qmean <- matrix(0,0,0)
N11q <- matrix(0,0,0)
C11q <- matrix(0,0,0)
I11q <- matrix(0,0,0)
for(i in 1:30){
occ11q<- matrix(0,0,0)
occ11q<- filter(ttest11, occupation ==i)
for (j in 1:3) {
N11q<- filter(occ11q, immpop ==3)
C11q<- filter(occ11q, immpop ==1)
I11q <- filter(occ11q, immpop==2)
N11qmean[i]<- mean(N11q$wage)
C11qmean[i]<- mean(C11q$wage)
I11qmean[i]<-mean(I11q$wage)
N11qmean[is.na(N11qmean)] <- 0
C11qmean[is.na(N11qmean)] <- 0
I11qmean[is.na(N11qmean)] <- 0
Mean201q1<-cbind(N11qmean, C11qmean, I11qmean)
Mean201q1<-as.data.frame(Mean201q1)
colnames(Mean201q1)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean201q1[is.na(Mean201q1)] <- 0
}
}
N16qmean <- matrix(0,0,0)
C16qmean <- matrix(0,0,0)
I16qmean <- matrix(0,0,0)
N16q <- matrix(0,0,0)
C16q <- matrix(0,0,0)
I16q <- matrix(0,0,0)
for(i in 1:30){
occ16q<- matrix(0,0,0)
occ16q<- filter(ttest16, occupation ==i)
for (j in 1:3) {
N16q<- filter(occ16q, immpop ==3)
C16q<- filter(occ16q, immpop ==1)
I16q <- filter(occ16q, immpop==2)
N16qmean[i]<- mean(N16q$wage)
C16qmean[i]<- mean(C16q$wage)
I16qmean[i]<-mean(I16q$wage)
N16qmean[is.na(N16qmean)] <- 0
C16qmean[is.na(N16qmean)] <- 0
I16qmean[is.na(N16qmean)] <- 0
Mean201q6<-cbind(N16qmean, C16qmean, I16qmean)
Mean201q6<-as.data.frame(Mean201q6)
colnames(Mean201q6)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean201q6[is.na(Mean201q6)] <- 0
}
}
Occupation_Number<-matrix(1:25, nrow=25, ncol=1)
Years01<-matrix(2001, nrow=25, ncol=1)
Years06<-matrix(2006, nrow=25, ncol=1)
Cit <- matrix("Citizen", nrow=25, ncol=1)
Imm <- matrix("Immigrant", nrow=25, ncol=1)
#2001
C01df<-cbind(Occupation_Number,C01mean,Cit)
C01df<-as.data.frame(C01df)
colnames(C01df)<-c("Occupation_Number","Average Income","ImmStat")
I01df<-cbind(Occupation_Number,I01mean,Imm)
I01df<-as.data.frame(I01df)
colnames(I01df)<-c("Occupation_Number","Average Income","ImmStat")
IC01<-bind_rows(C01df,I01df)
IC01<-as.data.frame(IC01)
IC01$Occupation_Number<-factor(IC01$Occupation_Number, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25))
IC01$`Average Income`<-as.numeric(IC01$`Average Income`)
IC01$Occupation_Number<-as.integer(IC01$Occupation_Number)
IC01$ImmStat<-as.factor(IC01$ImmStat)
# 2006
C06df<-cbind(Occupation_Number,C06mean,Cit)
C06df<-as.data.frame(C06df)
colnames(C06df)<-c("Occupation_Number","Average Income","ImmStat")
I06df<-cbind(Occupation_Number,I06mean,Imm)
I06df<-as.data.frame(I06df)
colnames(I06df)<-c("Occupation_Number","Average Income","ImmStat")
IC06<-bind_rows(C06df,I06df)
IC06<-as.data.frame(IC06)
IC06$Occupation_Number<-factor(IC06$Occupation_Number, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25))
IC06$`Average Income`<-as.numeric(IC06$`Average Income`)
IC06$Occupation_Number<-as.integer(IC06$Occupation_Number)
IC06$ImmStat<-as.factor(IC06$ImmStat)
#2011
Occupation_Number30<-matrix(1:30, nrow=30, ncol=1)
Years11<-matrix(2011, nrow=30, ncol=1)
Years16<-matrix(2016, nrow=30, ncol=1)
Cit2 <- matrix("Citizen", nrow=30, ncol=1)
Imm2 <- matrix("Immigrant", nrow=30, ncol=1)
C11df<-cbind(Occupation_Number30,C11mean,Cit2)
C11df<-as.data.frame(C11df)
colnames(C11df)<-c("Occupation_Number30","Average Income","ImmStat")
I11df<-cbind(Occupation_Number30,I11mean,Imm2)
I11df<-as.data.frame(I11df)
colnames(I11df)<-c("Occupation_Number30","Average Income","ImmStat")
IC11<-bind_rows(C11df,I11df)
IC11<-as.data.frame(IC11)
IC11$Occupation_Number30<-factor(IC11$Occupation_Number30, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30))
IC11$`Average Income`<-as.numeric(IC11$`Average Income`)
IC11$Occupation_Number30<-as.integer(IC11$Occupation_Number30)
IC11$ImmStat<-as.factor(IC11$ImmStat)
C16df<-cbind(Occupation_Number30,C16mean,Cit2)
C16df<-as.data.frame(C16df)
colnames(C16df)<-c("Occupation_Number30","Average Income","ImmStat")
I16df<-cbind(Occupation_Number30,I16mean,Imm2)
I16df<-as.data.frame(I16df)
colnames(I16df)<-c("Occupation_Number30","Average Income","ImmStat")
IC16<-bind_rows(C16df,I16df)
IC16<-as.data.frame(IC16)
IC16$Occupation_Number30<-factor(IC16$Occupation_Number30, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30))
IC16$`Average Income`<-as.numeric(IC16$`Average Income`)
IC16$Occupation_Number30<-as.integer(IC16$Occupation_Number30)
IC16$ImmStat<-as.factor(IC16$ImmStat)
IC16[is.na(IC16)]<-0
Occupation_Number<-matrix(1:25, nrow=25, ncol=1)
Years01q<-matrix(2001, nrow=25, ncol=1)
Years06q<-matrix(2006, nrow=25, ncol=1)
Cit <- matrix("Citizen", nrow=25, ncol=1)
Imm <- matrix("Immigrant", nrow=25, ncol=1)
#2001q
C01qdf<-cbind(Occupation_Number,C01qmean,Cit)
C01qdf<-as.data.frame(C01qdf)
colnames(C01qdf)<-c("Occupation_Number","Average Income","ImmStat")
I01qdf<-cbind(Occupation_Number,I01qmean,Imm)
I01qdf<-as.data.frame(I01qdf)
colnames(I01qdf)<-c("Occupation_Number","Average Income","ImmStat")
IC01q<-bind_rows(C01qdf,I01qdf)
IC01q<-as.data.frame(IC01q)
IC01q$Occupation_Number<-factor(IC01q$Occupation_Number, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25))
IC01q$`Average Income`<-as.numeric(IC01q$`Average Income`)
IC01q$Occupation_Number<-as.integer(IC01q$Occupation_Number)
IC01q$ImmStat<-as.factor(IC01q$ImmStat)
# 2006q
C06qdf<-cbind(Occupation_Number,C06qmean,Cit)
C06qdf<-as.data.frame(C06qdf)
colnames(C06qdf)<-c("Occupation_Number","Average Income","ImmStat")
I06qdf<-cbind(Occupation_Number,I06qmean,Imm)
I06qdf<-as.data.frame(I06qdf)
colnames(I06qdf)<-c("Occupation_Number","Average Income","ImmStat")
IC06q<-bind_rows(C06qdf,I06qdf)
IC06q<-as.data.frame(IC06q)
IC06q$Occupation_Number<-factor(IC06q$Occupation_Number, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25))
IC06q$`Average Income`<-as.numeric(IC06q$`Average Income`)
IC06q$Occupation_Number<-as.integer(IC06q$Occupation_Number)
IC06q$ImmStat<-as.factor(IC06q$ImmStat)
#201q1
Occupation_Number30<-matrix(1:30, nrow=30, ncol=1)
Years11q<-matrix(2011, nrow=30, ncol=1)
Years16q<-matrix(2016, nrow=30, ncol=1)
Cit2 <- matrix("Citizen", nrow=30, ncol=1)
Imm2 <- matrix("Immigrant", nrow=30, ncol=1)
C11qdf<-cbind(Occupation_Number30,C11qmean,Cit2)
C11qdf<-as.data.frame(C11qdf)
colnames(C11qdf)<-c("Occupation_Number30","Average Income","ImmStat")
I11qdf<-cbind(Occupation_Number30,I11qmean,Imm2)
I11qdf<-as.data.frame(I11qdf)
colnames(I11qdf)<-c("Occupation_Number30","Average Income","ImmStat")
IC11q<-bind_rows(C11qdf,I11qdf)
IC11q<-as.data.frame(IC11q)
IC11q$Occupation_Number30<-factor(IC11q$Occupation_Number30, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30))
IC11q$`Average Income`<-as.numeric(IC11q$`Average Income`)
IC11q$Occupation_Number30<-as.integer(IC11q$Occupation_Number30)
IC11q$ImmStat<-as.factor(IC11q$ImmStat)
C16qdf<-cbind(Occupation_Number30,C16qmean,Cit2)
C16qdf<-as.data.frame(C16qdf)
colnames(C16qdf)<-c("Occupation_Number30","Average Income","ImmStat")
I16qdf<-cbind(Occupation_Number30,I16qmean,Imm2)
I16qdf<-as.data.frame(I16qdf)
colnames(I16qdf)<-c("Occupation_Number30","Average Income","ImmStat")
IC16q<-bind_rows(C16qdf,I16qdf)
IC16q<-as.data.frame(IC16q)
IC16q$Occupation_Number30<-factor(IC16q$Occupation_Number30, levels=c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30))
IC16q$`Average Income`<-as.numeric(IC16q$`Average Income`)
IC16q$Occupation_Number30<-as.integer(IC16q$Occupation_Number30)
IC16q$ImmStat<-as.factor(IC16q$ImmStat)
IC16q[is.na(IC16q)]<-0
#2001
ggplot(IC01, aes(x = Occupation_Number, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC01$Occupation_Number), labels = as.integer(unique(IC01$Occupation_Number))) +
labs(
title = "2001 immigrant and citizen Average income per occupation"
) +
theme_minimal()
ggplot(IC06, aes(x = Occupation_Number, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC06$Occupation_Number), labels = as.integer(unique(IC06$Occupation_Number))) +
labs(
title = "2006 immigrant and citizen Average income per occupation"
) +
theme_minimal()
#2011
ggplot(IC11, aes(x = Occupation_Number30, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC11$Occupation_Number30), labels = as.integer(unique(IC11$Occupation_Number30))) +
labs(
title = "2011 immigrant and citizen Average income per occupation"
) +
theme_minimal()
ggplot(IC16, aes(x = Occupation_Number30, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC16$Occupation_Number30), labels = as.integer(unique(IC16$Occupation_Number30))) +
labs(
title = "2016 immigrant and citizen Average income per occupation"
) +
theme_minimal()
ggplot(IC01q,
aes(x = Occupation_Number, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC01q$Occupation_Number),
labels = as.integer(unique(IC01q$Occupation_Number))) +
labs(title = "2000 immigrant and citizen Average income per occupation") +
theme_minimal()
ggplot(IC06q, aes(x = Occupation_Number, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC06q$Occupation_Number), labels = as.integer(unique(IC06q$Occupation_Number))) +
labs(
title = "2006 immigrant and citizen Average income per occupation"
) +
theme_minimal()
#2011
ggplot(IC11q, aes(x = Occupation_Number30, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC11q$Occupation_Number30), labels = as.integer(unique(IC11q$Occupation_Number30))) +
labs(
title = "2011 immigrant and citizen Average income per occupation"
) +
theme_minimal()
ggplot(IC16q, aes(x = Occupation_Number30, y = `Average Income`, colour = ImmStat)) +
geom_line() +
scale_x_continuous(breaks = unique(IC16q$Occupation_Number30), labels = as.integer(unique(IC16q$Occupation_Number30))) +
labs(
title = "2016 immigrant and citizen Average income per occupation"
) +
theme_minimal()
For consistency and accuracy of four census years, new occupation characters is assigned for each occupation. Then the same process is continued for the adjusted occupations as seen above.
TESTadjj01<- filter(df01, age>14, age<87, hours<4, wage<8888887, occupation<26, immpop<4, wage>2)
test01adjj <- TESTadjj01[ , -c(1,2,4,6)]
TESTadjj06<- filter(df06, age>5, age<87, hours<4, wage<8888887, occupation<26, immpop<4, wage>2)
test06adjj <- TESTadjj06[ , -c(1,2,4,6)]
TESTadjj11<- filter(df11, age>5, age<87, hours<4, wage<8888887, occupation<31, immpop<4, wage>2)
test11adjj <- TESTadjj11[ , -c(1,2,4,6)]
TESTadjj16<- filter(df16, age>6, age<87, hours<4, wage<88888887, occupation<31, immpop<4, wage>2)
test16adjj <- TESTadjj16[ , -c(1,2,4,6)]
The occupations are reallocated to match the combined occupation list shown in the Categorization of Occupations section; 2001-2016.
NS
#2001
test01adj$occupation<-as.integer(as.integer(test01adj$occupation))
test01adj['occupation'][test01adj['occupation'] == 5]<-4
test01adj['occupation'][test01adj['occupation']==17]<-14
test01adj['occupation'][test01adj['occupation']==16]<-15
test01adj['occupation'][test01adj['occupation'] == 18 | test01adj['occupation'] ==19| test01adj['occupation'] ==20] <-16
test01adj['occupation'][test01adj['occupation'] ==24] <-25
#2006
test06adj$occupation<-as.integer(as.integer(test06adj$occupation))
test06adj['occupation'][test06adj['occupation'] == 5]<-4
test06adj['occupation'][test06adj['occupation']==17]<-14
test06adj['occupation'][test06adj['occupation']==16]<-15
test06adj['occupation'][test06adj['occupation'] == 18 | test06adj['occupation'] ==19| test06adj['occupation'] ==20] <-16
test06adj['occupation'][test06adj['occupation'] ==24] <-25
#2011
test11adj$occupation<-as.integer(as.integer(test11adj$occupation))
test11adj['occupation'][test11adj['occupation'] == 3 | test11adj['occupation'] ==4] <-2
test11adj['occupation'][test11adj['occupation'] == 7 | test11adj['occupation'] ==8] <-6
test11adj['occupation'][test11adj['occupation'] == 10]<-9
test11adj['occupation'][test11adj['occupation'] == 15]<-14
test11adj['occupation'][test11adj['occupation'] == 20]<-18
test11adj['occupation'][test11adj['occupation'] == 21 | test11adj['occupation'] ==23] <-19
test11adj['occupation'][test11adj['occupation'] == 25]<-24
test11adj['occupation'][test11adj['occupation'] == 29]<-30
#2016
test16adj$occupation<-as.integer(as.integer(test16adj$occupation))
test16adj['occupation'][test16adj['occupation'] == 3 | test16adj['occupation'] ==4] <-2
test16adj['occupation'][test16adj['occupation'] == 7 | test16adj['occupation'] ==8] <-6
test16adj['occupation'][test16adj['occupation'] == 10]<-9
test16adj['occupation'][test16adj['occupation'] == 15]<-14
test16adj['occupation'][test16adj['occupation'] == 20]<-18
test16adj['occupation'][test16adj['occupation'] == 21 | test16adj['occupation'] ==23] <-19
test16adj['occupation'][test16adj['occupation'] == 25]<-24
test16adj['occupation'][test16adj['occupation'] == 29]<-30
Canada
#2001
test01adjj$occupation<-as.integer(as.integer(test01adjj$occupation))
test01adjj['occupation'][test01adjj['occupation'] == 5]<-4
test01adjj['occupation'][test01adjj['occupation']==17]<-14
test01adjj['occupation'][test01adjj['occupation']==16]<-15
test01adjj['occupation'][test01adjj['occupation'] == 18 | test01adjj['occupation'] ==19| test01adjj['occupation'] ==20] <-16
test01adjj['occupation'][test01adjj['occupation'] ==24] <-25
#2006
test06adjj$occupation<-as.integer(as.integer(test06adjj$occupation))
test06adjj['occupation'][test06adjj['occupation'] == 5]<-4
test06adjj['occupation'][test06adjj['occupation']==17]<-14
test06adjj['occupation'][test06adjj['occupation']==16]<-15
test06adjj['occupation'][test06adjj['occupation'] == 18 | test06adjj['occupation'] ==19| test06adjj['occupation'] ==20] <-16
test06adjj['occupation'][test06adjj['occupation'] ==24] <-25
#2011
test11adjj$occupation<-as.integer(as.integer(test11adjj$occupation))
test11adjj['occupation'][test11adjj['occupation'] == 3 | test11adjj['occupation'] ==4] <-2
test11adjj['occupation'][test11adjj['occupation'] == 7 | test11adjj['occupation'] ==8] <-6
test11adjj['occupation'][test11adjj['occupation'] == 10]<-9
test11adjj['occupation'][test11adjj['occupation'] == 15]<-14
test11adjj['occupation'][test11adjj['occupation'] == 20]<-18
test11adjj['occupation'][test11adjj['occupation'] == 21 | test11adjj['occupation'] ==23] <-19
test11adjj['occupation'][test11adjj['occupation'] == 25]<-24
test11adjj['occupation'][test11adjj['occupation'] == 29]<-30
#2016
test16adjj$occupation<-as.integer(as.integer(test16adjj$occupation))
test16adjj['occupation'][test16adjj['occupation'] == 3 | test16adjj['occupation'] ==4] <-2
test16adjj['occupation'][test16adjj['occupation'] == 7 | test16adjj['occupation'] ==8] <-6
test16adjj['occupation'][test16adjj['occupation'] == 10]<-9
test16adjj['occupation'][test16adjj['occupation'] == 15]<-14
test16adjj['occupation'][test16adjj['occupation'] == 20]<-18
test16adjj['occupation'][test16adjj['occupation'] == 21 | test16adjj['occupation'] ==23] <-19
test16adjj['occupation'][test16adjj['occupation'] == 25]<-24
test16adjj['occupation'][test16adjj['occupation'] == 29]<-30
#2001
N01adjmean <- matrix(0,0,0)
C01adjmean <- matrix(0,0,0)
I01adjmean <- matrix(0,0,0)
N01adj <- matrix(0,0,0)
C01adj <- matrix(0,0,0)
I01adj <- matrix(0,0,0)
for(i in 1:25){
occ01adj<- matrix(0,0,0)
occ01adj<- filter(test01adj, occupation ==i)
for (j in 1:3) {
N01adj<- filter(occ01adj, immpop ==3)
C01adj<- filter(occ01adj, immpop ==1)
I01adj <- filter(occ01adj, immpop==2)
N01adjmean[i]<- mean(N01adj$wage)
C01adjmean[i]<- mean(C01adj$wage)
I01adjmean[i]<-mean(I01adj$wage)
N01adjmean[is.na(N01adjmean)] <- 0
C01adjmean[is.na(N01adjmean)] <- 0
I01adjmean[is.na(N01adjmean)] <- 0
Mean2001adj<-cbind(N01adjmean, C01adjmean, I01adjmean)
Mean2001adj<-as.data.frame(Mean2001adj)
colnames(Mean2001adj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2001adj[is.na(Mean2001adj)] <- 0
}
}
#2006
N06adjmean <- matrix(0,0,0)
C06adjmean <- matrix(0,0,0)
I06adjmean <- matrix(0,0,0)
N06adj <- matrix(0,0,0)
C06adj <- matrix(0,0,0)
I06adj <- matrix(0,0,0)
for(i in 1:25){
occ06adj<- matrix(0,0,0)
occ06adj<- filter(test06adj, occupation ==i)
for (j in 1:3) {
N06adj<- filter(occ06adj, immpop ==1)
C06adj<- filter(occ06adj, immpop ==2)
I06adj <- filter(occ06adj, immpop==3)
N06adjmean[i]<- mean(N06adj$wage)
C06adjmean[i]<- mean(C06adj$wage)
I06adjmean[i]<-mean(I06adj$wage)
N06adjmean[is.na(N06adjmean)] <- 0
C06adjmean[is.na(N06adjmean)] <- 0
I06adjmean[is.na(N06adjmean)] <- 0
I06adjmean[is.na(I06adjmean)] <- 0
C06adjmean[is.na(C06adjmean)] <- 0
Mean2006adj<-cbind(N06adjmean, C06adjmean, I06adjmean)
Mean2006adj<-as.data.frame(Mean2006adj)
colnames(Mean2006adj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2006adj[is.na(Mean2006adj)] <- 0
}
}
#2011
N11adjmean <- matrix(0,0,0)
C11adjmean <- matrix(0,0,0)
I11adjmean <- matrix(0,0,0)
N11adj <- matrix(0,0,0)
C11adj <- matrix(0,0,0)
I11adj <- matrix(0,0,0)
for(i in 1:30){
occ11adj<- matrix(0,0,0)
occ11adj<- filter(test11adj, occupation ==i)
for (j in 1:3) {
N11adj<- filter(occ11adj, immpop ==3)
C11adj<- filter(occ11adj, immpop ==1)
I11adj <- filter(occ11adj, immpop==2)
N11adjmean[i]<- mean(N11adj$wage)
C11adjmean[i]<- mean(C11adj$wage)
I11adjmean[i]<-mean(I11adj$wage)
N11adjmean[is.na(N11adjmean)] <- 0
C11adjmean[is.na(N11adjmean)] <- 0
I11adjmean[is.na(N11adjmean)] <- 0
Mean2011adj<-cbind(N11adjmean, C11adjmean, I11adjmean)
Mean2011adj<-as.data.frame(Mean2011adj)
colnames(Mean2011adj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2011adj[is.na(Mean2011adj)] <- 0
}
}
#2016
N16adjmean <- matrix(0,0,0)
C16adjmean <- matrix(0,0,0)
I16adjmean <- matrix(0,0,0)
N16adj <- matrix(0,0,0)
C16adj <- matrix(0,0,0)
I16adj <- matrix(0,0,0)
for(i in 1:30){
occ16adj<- matrix(0,0,0)
occ16adj<- filter(test16adj, occupation ==i)
for (j in 1:3) {
N16adj<- filter(occ16adj, immpop ==3)
C16adj<- filter(occ16adj, immpop ==1)
I16adj <- filter(occ16adj, immpop==2)
N16adjmean[i]<- mean(N16adj$wage)
C16adjmean[i]<- mean(C16adj$wage)
I16adjmean[i]<-mean(I16adj$wage)
N16adjmean[is.na(N16adjmean)] <- 0
C16adjmean[is.na(N16adjmean)] <- 0
I16adjmean[is.na(N16adjmean)] <- 0
Mean2016adj<-cbind(N16adjmean, C16adjmean, I16adjmean)
Mean2016adj<-as.data.frame(Mean2016adj)
colnames(Mean2016adj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2016adj[is.na(Mean2016adj)] <- 0
}
}
C01adjmean<-as.data.frame(C01adjmean)
C06adjmean<-as.data.frame(C06adjmean)
C11adjmean<-as.data.frame(C11adjmean)
C16adjmean<-as.data.frame(C16adjmean)
I01adjmean<-as.data.frame(I01adjmean)
I06adjmean<-as.data.frame(I06adjmean)
I11adjmean<-as.data.frame(I11adjmean)
I16adjmean<-as.data.frame(I16adjmean)
#2001
N01adjjmean <- matrix(0,0,0)
C01adjjmean <- matrix(0,0,0)
I01adjjmean <- matrix(0,0,0)
N01adjj <- matrix(0,0,0)
C01adjj <- matrix(0,0,0)
I01adjj <- matrix(0,0,0)
for(i in 1:25){
occ01adjj<- matrix(0,0,0)
occ01adjj<- filter(test01adjj, occupation ==i)
for (j in 1:3) {
N01adjj<- filter(occ01adjj, immpop ==3)
C01adjj<- filter(occ01adjj, immpop ==1)
I01adjj <- filter(occ01adjj, immpop==2)
N01adjjmean[i]<- mean(N01adjj$wage)
C01adjjmean[i]<- mean(C01adjj$wage)
I01adjjmean[i]<-mean(I01adjj$wage)
N01adjjmean[is.na(N01adjjmean)] <- 0
C01adjjmean[is.na(N01adjjmean)] <- 0
I01adjjmean[is.na(N01adjjmean)] <- 0
Mean2001adjj<-cbind(N01adjjmean, C01adjjmean, I01adjjmean)
Mean2001adjj<-as.data.frame(Mean2001adjj)
colnames(Mean2001adjj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2001adjj[is.na(Mean2001adjj)] <- 0
}
}
#2006
N06adjjmean <- matrix(0,0,0)
C06adjjmean <- matrix(0,0,0)
I06adjjmean <- matrix(0,0,0)
N06adjj <- matrix(0,0,0)
C06adjj <- matrix(0,0,0)
I06adjj <- matrix(0,0,0)
for(i in 1:25){
occ06adjj<- matrix(0,0,0)
occ06adjj<- filter(test06adjj, occupation ==i)
for (j in 1:3) {
N06adjj<- filter(occ06adjj, immpop ==1)
C06adjj<- filter(occ06adjj, immpop ==2)
I06adjj <- filter(occ06adjj, immpop==3)
N06adjjmean[i]<- mean(N06adjj$wage)
C06adjjmean[i]<- mean(C06adjj$wage)
I06adjjmean[i]<-mean(I06adjj$wage)
N06adjjmean[is.na(N06adjjmean)] <- 0
C06adjjmean[is.na(N06adjjmean)] <- 0
I06adjjmean[is.na(N06adjjmean)] <- 0
I06adjjmean[is.na(I06adjjmean)] <- 0
C06adjjmean[is.na(C06adjjmean)] <- 0
Mean2006adjj<-cbind(N06adjjmean, C06adjjmean, I06adjjmean)
Mean2006adjj<-as.data.frame(Mean2006adjj)
colnames(Mean2006adjj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2006adjj[is.na(Mean2006adjj)] <- 0
}
}
#2011
N11adjjmean <- matrix(0,0,0)
C11adjjmean <- matrix(0,0,0)
I11adjjmean <- matrix(0,0,0)
N11adjj <- matrix(0,0,0)
C11adjj <- matrix(0,0,0)
I11adjj <- matrix(0,0,0)
for(i in 1:30){
occ11adjj<- matrix(0,0,0)
occ11adjj<- filter(test11adjj, occupation ==i)
for (j in 1:3) {
N11adjj<- filter(occ11adjj, immpop ==3)
C11adjj<- filter(occ11adjj, immpop ==1)
I11adjj <- filter(occ11adjj, immpop==2)
N11adjjmean[i]<- mean(N11adjj$wage)
C11adjjmean[i]<- mean(C11adjj$wage)
I11adjjmean[i]<-mean(I11adjj$wage)
N11adjjmean[is.na(N11adjjmean)] <- 0
C11adjjmean[is.na(N11adjjmean)] <- 0
I11adjjmean[is.na(N11adjjmean)] <- 0
Mean2011adjj<-cbind(N11adjjmean, C11adjjmean, I11adjjmean)
Mean2011adjj<-as.data.frame(Mean2011adjj)
colnames(Mean2011adjj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2011adjj[is.na(Mean2011adjj)] <- 0
}
}
#2016
N16adjjmean <- matrix(0,0,0)
C16adjjmean <- matrix(0,0,0)
I16adjjmean <- matrix(0,0,0)
N16adjj <- matrix(0,0,0)
C16adjj <- matrix(0,0,0)
I16adjj <- matrix(0,0,0)
for(i in 1:30){
occ16adjj<- matrix(0,0,0)
occ16adjj<- filter(test16adjj, occupation ==i)
for (j in 1:3) {
N16adjj<- filter(occ16adjj, immpop ==3)
C16adjj<- filter(occ16adjj, immpop ==1)
I16adjj <- filter(occ16adjj, immpop==2)
N16adjjmean[i]<- mean(N16adjj$wage)
C16adjjmean[i]<- mean(C16adjj$wage)
I16adjjmean[i]<-mean(I16adjj$wage)
N16adjjmean[is.na(N16adjjmean)] <- 0
C16adjjmean[is.na(N16adjjmean)] <- 0
I16adjjmean[is.na(N16adjjmean)] <- 0
Mean2016adjj<-cbind(N16adjjmean, C16adjjmean, I16adjjmean)
Mean2016adjj<-as.data.frame(Mean2016adjj)
colnames(Mean2016adjj)<-c(c("Non Permanent Resident","Citizen","Immigrant"))
Mean2016adjj[is.na(Mean2016adjj)] <- 0
}
}
C01adjjmean<-as.data.frame(C01adjjmean)
C06adjjmean<-as.data.frame(C06adjjmean)
C11adjjmean<-as.data.frame(C11adjjmean)
C16adjjmean<-as.data.frame(C16adjjmean)
I01adjjmean<-as.data.frame(I01adjjmean)
I06adjjmean<-as.data.frame(I06adjjmean)
I11adjjmean<-as.data.frame(I11adjjmean)
I16adjjmean<-as.data.frame(I16adjjmean)
years01<-matrix(2001, nrow=19, ncol=1)
years06<-matrix(2006, nrow=19, ncol=1)
years11<-matrix(2011, nrow=19, ncol=1)
years16<-matrix(2016, nrow=19, ncol=1)
occupation_adj<-matrix(1:19, nrow=19, ncol=1)
#2001
ICdiv01adj<-(I01adjmean/C01adjmean)
ICdiv01adj<-as.data.frame(ICdiv01adj)
div01adj<-na.omit(ICdiv01adj)
adj2001<- cbind(occupation_adj, div01adj, years01)
colnames(adj2001)<-c("Occup_Num_adj", "Immpercent", "Year")
#2006
ICdiv06adj<-(I06adjmean/C06adjmean)
ICdiv06adj<-as.data.frame(ICdiv06adj)
div06adj<-na.omit(ICdiv06adj)
adj2006<- cbind(occupation_adj, div06adj, years06)
colnames(adj2006)<-c("Occup_Num_adj", "Immpercent", "Year")
#2011
ICdiv11adj<-(I11adjmean/C11adjmean)
ICdiv11adj<-as.data.frame(ICdiv11adj)
div11adj<-na.omit(ICdiv11adj)
adj2011<- cbind(occupation_adj, div11adj, years11)
colnames(adj2011)<-c("Occup_Num_adj", "Immpercent", "Year")
#2016
a<-c()
Mean2016adj$a<-c(Mean2016adj$`Non Permanent Resident`+Mean2016adj$Citizen+Mean2016adj$Immigrant)
Mean2016adj$a[Mean2016adj$a==0]<-NA
Mean2016adj<-na.omit(Mean2016adj)
ICdiv16adj<-(Mean2016adj$Immigrant/Mean2016adj$Citizen)
ICdiv16adj<-as.data.frame(ICdiv16adj)
div16adj<-na.omit(ICdiv16adj)
adj2016<- cbind(occupation_adj, div16adj, years16)
colnames(adj2016)<-c("Occup_Num_adj", "Immpercent", "Year")
#2001
ICdiv01adjj<-(I01adjjmean/C01adjjmean)
ICdiv01adjj<-as.data.frame(ICdiv01adjj)
div01adjj<-na.omit(ICdiv01adjj)
adjj2001<- cbind(occupation_adj, div01adjj, years01)
colnames(adjj2001)<-c("Occup_Num_adjj", "Immpercent", "Year")
#2006
ICdiv06adjj<-(I06adjjmean/C06adjjmean)
ICdiv06adjj<-as.data.frame(ICdiv06adjj)
div06adjj<-na.omit(ICdiv06adjj)
adjj2006<- cbind(occupation_adj, div06adjj, years06)
colnames(adjj2006)<-c("Occup_Num_adjj", "Immpercent", "Year")
#2011
ICdiv11adjj<-(I11adjjmean/C11adjjmean)
ICdiv11adjj<-as.data.frame(ICdiv11adjj)
div11adjj<-na.omit(ICdiv11adjj)
adjj2011<- cbind(occupation_adj, div11adjj, years11)
colnames(adjj2011)<-c("Occup_Num_adjj", "Immpercent", "Year")
#2016
a<-c()
Mean2016adjj$a<-c(Mean2016adjj$`Non Permanent Resident`+Mean2016adjj$Citizen+Mean2016adjj$Immigrant)
Mean2016adjj$a[Mean2016adjj$a==0]<-NA
Mean2016adjj<-na.omit(Mean2016adjj)
ICdiv16adjj<-(Mean2016adjj$Immigrant/Mean2016adjj$Citizen)
ICdiv16adjj<-as.data.frame(ICdiv16adjj)
div16adjj<-na.omit(ICdiv16adjj)
adjj2016<- cbind(occupation_adj, div16adjj, years16)
colnames(adjj2016)<-c("Occup_Num_adjj", "Immpercent", "Year")
allyear<-bind_rows(adj2001,adj2006,adj2011,adj2016)
allyearj<-bind_rows(adjj2001,adjj2006,adjj2011,adjj2016)
ggplot(allyear) +
aes(
x = Occup_Num_adj,
y = Immpercent,
fill = Year,
colour = Year,
group = Year
) +
geom_hline(yintercept = 1)+
geom_line() +
scale_x_continuous(labels=as.character(occupation_adj),breaks=occupation_adj)+
scale_fill_distiller(palette = "Spectral", direction = 1) +
scale_color_distiller(palette = "Spectral", direction = 1) +
labs(
x = "Occupation Number",
y = "% of Immigrant Income to Citizen Income",
title = "% Difference in Immigrant and Citizen income by Occupation",
color = "Census Year"
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
ggplot(allyearj) +
aes(
x = Occup_Num_adjj,
y = Immpercent,
fill = Year,
colour = Year,
group = Year
) +
geom_hline(yintercept = 1)+
geom_line() +
scale_x_continuous(labels=as.character(occupation_adj),breaks=occupation_adj)+
scale_fill_distiller(palette = "Spectral", direction = 1) +
scale_color_distiller(palette = "Spectral", direction = 1) +
labs(
x = "Occupation Number",
y = "% of Immigrant Income to Citizen Income",
title = "% Difference in Immigrant and Citizen income by Occupation",
color = "Census Year"
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
allyear2<-bind_cols(occupation_adj,div01adj,div06adj,div11adj, div16adj)
colnames(allyear2)=c('Occ', '2001', '2006', '2011', '2016')
datatable(allyear2) %>% formatStyle(c('2001','2006','2011','2016'), backgroundColor = styleInterval(1, c('white', 'lightblue')))
allyear2j<-bind_cols(occupation_adj,div01adjj,div06adjj,div11adjj, div16adjj)
colnames(allyear2j)=c('Occ', '2001', '2006', '2011', '2016')
datatable(allyear2j) %>% formatStyle(c('2001','2006','2011','2016'), backgroundColor = styleInterval(1, c('white', 'lightblue')))